/**********************************************************************/
/*
   Author: Karan Makkar
   Created: Oct  2023
   Updated: Aug 2025, by Youssef Assarssah
   Description: Demographic Match Shares betweem Survey and Admin Data,
   by survey wave
   Output: Table A12 
   */
/**********************************************************************/

* include filepaths 
  if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"

  clear all
  set more off
  set matsize 11000

* Log
  cap log close
  global prefix: display %tdCYND td(`c(current_date)')
  log using "$KP_logs/${prefix}_demographics_match_bywave.txt", text replace
  
  *Switches
  local admin = 0

/*----------------------------------------------------*/
* Section: Run programs and set dataset filepaths
/*----------------------------------------------------*/

* Set filepaths for dataset
  global aug20 "$KP_deid_sakernas/Clean/sak_aug20_deid_clean_merged.dta"
  global feb21 "$KP_deid_sakernas/Clean/sak_feb21_deid_clean_merged.dta"
  global aug21 "$KP_deid_sakernas/Clean/sak_aug21_deid_clean_merged.dta"
  global sep20 "$KP_deid_susenas/Clean/sus_sep20_deid_clean_merged.dta"
  global mar21 "$KP_deid_susenas/Clean/sus_mar21_deid_clean_merged.dta"
  global sep21 "$KP_deid_susenas/Clean/sus_sep21_deid_clean_merged.dta"
  global mar22 "$KP_deid_susenas/Clean/sus_mar22_deid_clean_merged.dta"


  /************
   Save Admin Data Tempfiles
   ************/
   if `admin' ==1 {
  use "$KP_deid_admin/Clean/pmo_b1-22_clean_long_deid.dta", clear

  ** 2021
  preserve
  gen diff = abs(17-batch)

   * keep the first demographic response per anon_id4
  bysort anon_id4 (batch diff): keep if _n ==1
  
  sa "$KP_deid_admin/Clean/admin_s21", replace
  restore 
   }

  /************
   Sakernas
   ************/
  use "${aug20}", clear
  append using "${feb21}"
  append using "${aug21}"
  
  *Apply before survey var
  gen apply_before_survey = 0
  replace apply_before_survey  = 1 if sak_round == 5 & first_apply_batch <=3
  replace apply_before_survey  = 1 if sak_round == 6 & first_apply_batch <=11
  replace apply_before_survey  = 1 if sak_round == 7 & first_apply_batch <=17

  keep if apply_before_survey == 1

  gen diff = abs(17-batch)
  bysort anon_id4 sak_round (batch diff): keep if _n ==1
  drop diff
  
  * Province
   rename kode_prov prov_id_sak
   merge m:1 prov_id_sak using "$KP_deid_misc/Clean/provid_anon_crosswalk", nogen assert(1 3) keepusing(anon_prov_id_sak)

  * Gender
  gen gender_sak = female ==0
  drop gender

  *Educ cleaning
  
  *First, make consistent across sak waves
  replace educ = 5 if educ ==6 & sak_round==7
  replace educ = 6 if educ ==7 & sak_round==7
  replace educ = 7 if educ ==8 & sak_round==7
  replace educ = 8 if inrange(educ, 9, 12) & sak_round==7
  
  *Next, make 5 category versions of each

  *SAK
  gen educ_sak = .
  replace educ_sak = 1 if inrange(educ, 1, 2) // Elementary or Lower
  replace educ_sak = 2 if educ ==3 // Junior High
  replace educ_sak = 3 if inrange(educ, 4, 5) // Senior High School + Vocational
  replace educ_sak = 4 if inrange(educ, 6, 8) // Above HS

  *PMO
  gen educ_pmo = .
  replace educ_pmo = 1 if education ==1 // Elementary or Lower
  replace educ_pmo = 2 if education ==2 // Junior High
  replace educ_pmo = 3 if education ==3 // Senior High School + Vocational
  replace educ_pmo = 4 if inrange(education, 4, 8) // Above Vocational
  
  * Merge in Admin data
  fmerge m:1 anon_id4 using "$KP_deid_admin/Clean/admin_s21", assert(2 3) keep(3) nogen keepusing(gender) 

  * Demographics match
  gen agematch = abs(year_dob - year_dob_sak) <=2
  la var agematch "Age Match"
  gen gendermatch = gender == gender_sak
  la var gendermatch "Gender Match"
  gen provincematch = anon_prov_id == anon_prov_id_sak
  la var provincematch "Province Match"
  gen educmatch = educ_pmo == educ_sak
  gen match3 = agematch == 1 & gendermatch == 1 & provincematch == 1 
  la var match3 "Age, Gender, Province Match"
  gen match4 = agematch == 1 & gendermatch == 1 & provincematch == 1 & educmatch ==1
  la var match4 "Age, Gender, Province, Educ Match"
  gen match3_sus = agematch == 1 & gendermatch == 1 & educmatch == 1 
  la var match3_sus "Age, Gender, Educ Match"

  * Panel A Sakernas, All Admin Appliers
  eststo t1: qui estpost summ agematch gendermatch provincematch educmatch match3_sus match4
  local j = 2
  forvalues i = 5/7 {
    eststo t`j': qui estpost summ agematch gendermatch provincematch educmatch match3_sus match4 if sak_round ==`i'
    local j = `j' + 1
  }

  * Panel B Sakernas, Report Appliers
  eststo t10: qui estpost summ agematch gendermatch provincematch educmatch match3_sus match4 if report_applied ==1
  local j = 11
  forvalues i = 5/7 {
    eststo t`j': qui estpost summ agematch gendermatch provincematch educmatch match3_sus match4 if sak_round ==`i' & report_applied ==1
    local j = `j' + 1
  }
  
  * Panel C Sakernas, Report Didn't Apply
  eststo t14: qui estpost summ agematch gendermatch provincematch educmatch match3_sus match4 if report_applied ==0
  local j = 15
  forvalues i = 5/7 {
    eststo t`j': qui estpost summ agematch gendermatch provincematch educmatch match3_sus match4 if sak_round ==`i' & report_applied ==0
    local j = `j' + 1
  }

  /************
   Susenas
   ************/
  use "${sep20}", clear
  append using "${mar21}"
  append using "${sep21}"
  append using "${mar22}"

  * Apply before survey var
  gen apply_before_survey = 0
  replace apply_before_survey  = 1 if sus_round == 5 & first_apply_batch <=5
  replace apply_before_survey  = 1 if sus_round == 6 & first_apply_batch <=11
  replace apply_before_survey  = 1 if sus_round == 7 & first_apply_batch <=18
  replace apply_before_survey  = 1 if sus_round == 8 & first_apply_batch <=22

  keep if apply_before_survey ==1

  gen diff = abs(17-batch)
  bysort anon_id4 sus_round (batch diff): keep if _n ==1
  drop diff 

  * Gender
  gen gender_sus = female ==0
  drop gender

  *Educ cleaning
  
  *SUS

  *Sep20
  gen educ_sus = .
  replace educ_sus = 1 if sus_round ==5 & inrange(educ, 1, 2) // Elementary or Lower
  replace educ_sus = 2 if sus_round ==5 & educ ==3 // Junior High
  replace educ_sus = 3 if sus_round ==5 & educ ==4 // Senior High School + Vocational
  replace educ_sus = 4 if sus_round ==5 & educ ==5 // Above Vocational

  *Mar 21
  replace educ_sus = 1 if sus_round ==6 & (inrange(educ, 1, 4) | educ == 22) // Elementary or Lower
  replace educ_sus = 2 if sus_round ==6 & inrange(educ, 5, 8) // Junior High
  replace educ_sus = 3 if sus_round ==6 & inrange(educ, 9, 14) // Senior High School + Vocational
  replace educ_sus = 4 if sus_round ==6 & inrange(educ, 15, 21) // Above Vocational

  *Sep21
  replace educ_sus = 1 if sus_round ==7 & (inrange(educ, 1, 5) | educ == 25) // Elementary or Lower
  replace educ_sus = 2 if sus_round ==7 & inrange(educ, 6, 10) // Junior High
  replace educ_sus = 3 if sus_round ==7 & inrange(educ, 11, 16) // Senior High School + Vocational
  replace educ_sus = 4 if sus_round ==7 & inrange(educ, 17, 24) // Above Vocational

  *Mar22
  replace educ_sus = 1 if sus_round ==8 & (inrange(educ, 1, 5)) // Elementary or Lower
  replace educ_sus = 2 if sus_round ==8 & inrange(educ, 6, 10) // Junior High
  replace educ_sus = 3 if sus_round ==8 & inrange(educ, 11, 16) // Senior High School + Vocational
  replace educ_sus = 4 if sus_round ==8 & inrange(educ, 17, 24) // Above Vocational



  *PMO
  gen educ_pmo = .
  replace educ_pmo = 1 if education ==1 // Elementary or Lower
  replace educ_pmo = 2 if education ==2 // Junior High
  replace educ_pmo = 3 if education ==3 // Senior High School + Vocational
  replace educ_pmo = 4 if inrange(education, 4, 8) // Above Vocational
  
  * Merge in Admin data
  fmerge m:1 anon_id4 using "$KP_deid_admin/Clean/admin_s21", assert(2 3) keep(3) nogen keepusing(gender) 

  * combine get_pk and hh_pk_win 
  gen pk_win_comb = hh_pk_win if sus_round == 6 | sus_round ==7
  replace pk_win_comb = get_pk if sus_round == 5 | sus_round ==8
  la var pk_win_comb "Report Selected"

  * Demographics match
  gen agematch = abs(year_dob - year_dob_sus) <=2
  la var agematch "Age Match"
  gen gendermatch = gender == gender_sus
  la var gendermatch "Gender Match"
  gen educmatch = educ_pmo == educ_sus
  la var educmatch "Educ Match"
  gen match3_sus = agematch == 1 & gendermatch == 1 & educmatch == 1 
  la var match3_sus "Age, Gender, Educ Match"

  * Panel A Susenas, All Admin Appliers
  eststo t5: qui estpost summ agematch gendermatch educmatch match3_sus
  local j = 6
  forvalues i = 5/8 {
    eststo t`j': qui estpost summ agematch gendermatch educmatch match3_sus if sus_round ==`i'
    local j = `j'+1
  }

***** Make Table ******
  gen provincematch = .
  la var provincematch "Province Match"

  gen match4 = .
  la var match4 "Age, Gender, Province, Educ Match"

  esttab t1 t2 t3 t4 t5 t6 t7 t8 t9 using $KP_output/tables/descriptive/tex/TableA12.tex, ///
  cells("mean(pattern(1 1 1 1 1 1 1 1 1) fmt(a2))") ///
  replace label nonum ///
  mtitle("Stacked" "Aug '20" "Feb '21" "Aug '21" "Stacked" "Sep '20" "Mar '21" "Sep '21" "Mar '22") /// 
  mgroups("Workforce Survey Wave" "Welfare Survey Wave", pattern(1 0 0 0 1 0 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
  prehead(`"\hline \hline"') ///
  posthead(`"& (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) \\"' `"\hline \\ "' `"\textit{Panel A: Workforce and Welfare Surveys} \\"') ///
  prefoot(`"\\ "') ///
  postfoot(`"\hline \\ "') ///
  stats(N, label(Observations) fmt(%9.0fc)) ///
  collabels(none)

  esttab t10 t11 t12 t13 using $KP_output/tables/descriptive/tex/TableA12.tex, ///
  cells("mean(pattern(1 1 1 1) fmt(a2))") ///
  append label ///
  nomtitle nonum /// 
  prehead(`""') ///
  posthead(`"\textit{Panel B: Workforce Survey, Report Applied} \\"') ///
  prefoot(`"\\ "') ///
  postfoot(`"\hline \\ "') ///
  stats(N, label(Observations) fmt(%9.0fc)) ///
  collabels(none)

  esttab t14 t15 t16 t17 using $KP_output/tables/descriptive/tex/TableA12.tex, ///
  cells("mean(pattern(1 1 1 1) fmt(a2))") ///
  append label ///
  nomtitle nonum /// 
  prehead(`""') ///
  prefoot(`"\\ "') ///
  posthead(`"\textit{Panel C: Workforce Survey, Report Didn't Apply} \\"') ///
  postfoot(`"Batches & 2-14, 16-17 & 2-3 & 2-11 & 2-14, 16-17 & 2-14, 16-22 & 2-5 & 2-11 & 2-14, 16-18 & 2-14, 16-22 \\"' `"\hline"') ///
  stats(N, label(Observations) fmt(%9.0fc)) ///
  collabels(none)